# -*- coding: utf-8 -*-
import urllib.request
import json
from bs4 import BeautifulSoup
import pymysql

# 建立连接
conn = pymysql.connect(host='120.76.154.223', user='dbuser', password='MYSQLadmin-02', database='supplier-portal',
                       charset="utf8")
cursor = conn.cursor()

tags = []
response = urllib.request.urlopen('https://movie.douban.com/j/search_tags?type=movie&tag=%E7%83%AD%E9%97%A8&source=')
html = response.read()
tags = json.loads(html)
tags = tags["tags"]
subjects = []
for tag in tags:
    i = 0
    while 1:
        print(tag + "----" + str(i))
        response = urllib.request.urlopen(
            'https://movie.douban.com/j/search_subjects?type=movie&tag=' + urllib.parse.quote(
                tag) + '&sort=recommend&page_limit=20&page_start=' + str(i))
        html = response.read()
        subjects = json.loads(html)
        subjects = subjects["subjects"]
        if len(subjects) == 0:
            break
        for subject in subjects:
            response = urllib.request.urlopen(subject["url"])
            html = response.read()
            sonp = BeautifulSoup(html, "lxml")
            title = sonp.h1.span.get_text();
            director = sonp.find(rel="v:directedBy").get_text();
            grade = sonp.find(property="v:average").get_text();
            img = sonp.find(rel="v:image").attrs['src'];
            url = subject["url"];
            initialReleaseDate = sonp.find(property="v:initialReleaseDate").get_text();
            print(title + " " + director + " " + grade)
            cursor.execute(
                'INSERT INTO movie(title,director,grade,img,url,initialReleaseDate) VALUES (%s,%s,%s,%s,%s,%s)',
                [title, director, grade, img, url, initialReleaseDate])
            conn.commit();
        i = i + 20;
